#### Load libraries ####
library(survey)
library(forcats)
library(haven)
library(tidyverse)
library(dplyr)
library(ggplot2)
library(ggfittext)
library(margins)
library(effects)
library(nnet) # for multinomial logit
library(svyVGAM)
library(stargazer)
library(broom)
library(scales)
library(rlang)
library(stringr)


#### Define key fixed variables ####
'%!in%' <- function(x,y)!('%in%'(x,y))
source("helper_functions.R")

# data_dir <- "Downloads/reexternalsurvey"
data_dir <- "."
data_file <- "caltech_climate_junejuly24.sav"


variables_in_model <- c("age4", "gender2", "race4", "educ4", "pid3")
                        #"region", "urbancity4", "ideo3", "newsint4", "populism", "conspiracy"
variables_in_model_labels <- c("Age_Group", "Gender", "Race_Ethnicity", "Education_Level", "Party_Identification")
                               # , "Region", "Urban_Rural", "Ideology", "Political_Interest","Populism_Score", "Conspiracy_Score"
# Create a list of nice labels for your variables
variable_labels <- list(
  age4 = "Age_Group",
  gender2 = "Gender",
  race4 = "Race_Ethnicity",
  educ4 = "Education_Level",
  pid3 = "Party_Identification",
  ideo3 = "Ideology",
  region = "Region",
  urbancity4 = "Urban_Rural",
  newsint4 = "Political_Interest",
  populism = "Populism_Score",
  conspiracy = "Conspiracy_Score",
  populism_bin = "Populism_Bin",
  conspiracy_bin = "Conspiracy_Bin",
  law = "Total_Election_Crimes",
  law2_q1 = "Multiple_Voting",
  law2_q2 = "Ballot_Tampering",
  law2_q3 = "Impersonation",
  law2_q4 = "Non_Citizen_Voting",
  law2_q5 = "Mail_Ballot_Fraud",
  law2_q6 = "Official_Tampering",
  law2_q7 = "Software_Hacking",
  law2_q8 = "Paying_Voters",
  law2_q9 = "Registration_Fraud",
  law2_q10 = "Dropbox_Fraud"
)

law_vars <- c("law2_q1", "law2_q2", "law2_q3", "law2_q4", "law2_q5", 
  "law2_q6", "law2_q7", "law2_q8", "law2_q9", "law2_q10")
# law_var_labels <- c("Voting More Than Once", "Ballot Tampering", "Voter Impersonation", 
#   "Non-Citizen Voting", "Mail Voting Fraud", "Tampering With Results", "Software Manipulation", 
#   "Paying For Votes", "Voter Registration Fraud", "Dropbox Stuffing")
law_var_labels <- c("Multiple Voting", "Ballot Tampering", "Impersonation", 
  "Non-Citizen Voting", "Mail Ballot Fraud", "Official Tampering", 
  "Software Hacking", "Paying Voters", "Registration Fraud", "Dropbox Fraud")



variables_in_model_labels <- variable_labels[variables_in_model] %>% unlist() %>% as.character()
law_var_labels <- variable_labels[law_vars] %>% unlist() %>% as.character()
all_var_labels <- c(law_var_labels, variables_in_model_labels)


rating_max <- 5
rating_min <- 1
total_questions <- 10
total_conspiracy_questions <- 6
total_populism_questions <- 6

#### Define functions ####
# vector version for "common" proportion

clean_str <- function(x){
  stringr::str_replace_all(x, "_", " ")
}
# prop <- function(x, val=1){
#   total <- sum(!is.na(x))
#   prop <- sum(x == val, na.rm=TRUE) / total
#   return(prop)
# }
# dt[,prop(law2_q1, "Common"),.(Political_Interest, Ideology)][order(Political_Interest, Ideology)]
# Function to calculate proportions
calculate_proportions <- function(x_var, y_var, design) {
  props <- prop.table(svytable(as.formula(paste("~", x_var, "+", y_var)), design), 1)
  props_df <- as.data.frame(props)
  names(props_df) <- c("Group", "Response", "Proportion")
  # long to wide
  props_df_wide <- props_df %>% pivot_wider(names_from = Response, values_from = Proportion)
  return(props_df_wide)
}
law_prop <- function(x_var, law_var, design){
# law_prop <- function(x_var, law_num, design){
  # law_var <- paste0("law2_q", law_num)
  calculate_proportions(x_var, law_var, design)
}

print_law_props <- function(x_var, design, labels=law_var_labels){
  # for (i in 1:10){
  for (lab in labels){
    print(sprintf("Proportion of responses by %s for %s", x_var, lab))
    print(law_prop(x_var, lab, design))
  }
}
# Function to label regression coefficients with labels
get_labels <- function(dat_final, var){
  labels <- levels(dat_final[[var]])[-1]
  if (is.null(labels)){
    labels <- var
  }
  return(labels)
}
label_interaction <- function(dat_final, var1, var2){
  levels1 <- get_labels(dat_final, var1)
  levels2 <- get_labels(dat_final, var2)
  id_labels <- expand.grid(levels1, levels2)
  id_labels <- paste(id_labels$Var1, id_labels$Var2, sep = ": ")
  return(id_labels)
}


# Function to run logistic regression with optional interactions
run_regression <- function(outcome, predictors, design, family=quasibinomial(link='logit'), interactions = NULL) {
  # 
  # Create the main formula
  main_formula <- paste(predictors, collapse = " + ")
  
  # Add interactions if specified
  if (!is.null(interactions)) {
    interaction_terms <- paste(interactions, collapse = " * ")
    formula <- as.formula(paste(outcome, "~", main_formula, "+", interaction_terms))
  } else {
    formula <- as.formula(paste(outcome, "~", main_formula))
  }
  
  # Run the model
  model <- svyglm(formula, design = design, family = family)
  return(model)
}

create_logit_summary_table <- function(models, 
                                       title = "Logistic Regression Results", 
                                       dep.var.labels = NULL,
                                       covariate.labels = NULL,
                                       out.file = NULL,
                                       label="") {
  
  # Function to extract coefficients and standard errors
  extract_coef_se <- function(model) {
    coef_summary <- summary(model)$coefficients
    coef <- coef_summary[, "Estimate"]
    se <- coef_summary[, "Std. Error"]
    return(list(coef = coef, se = se))
  }
  
  # Extract coefficients and standard errors for each model
  model_data <- lapply(models, extract_coef_se)
  
  # Create the LaTeX table
  stargazer_output <- stargazer(
    models,
    title = title,
    dep.var.labels = dep.var.labels,
    covariate.labels = covariate.labels,
    coef = lapply(model_data, function(x) x$coef),
    se = lapply(model_data, function(x) x$se),
    type = "latex",
    style = "default",
    model.numbers = TRUE,
    font.size = "footnotesize",
    single.row = FALSE,
    no.space = TRUE,
    table.placement = "h",
    digits = 3,
    star.cutoffs = c(0.05, 0.01, 0.001),
    ci = FALSE,
    header = FALSE, 
    label=label
  )
  
  # Write to file if specified
  if (!is.null(out.file)) {
    cat(stargazer_output, file = out.file, sep = "\n")
    cat("LaTeX table has been written to", out.file, "\n")
  }
  
  # Return the LaTeX code as a character vector
  return(stargazer_output)
}

# plot(law_me, ylab = "Proportion of Rumors Endorsed")

effect_plot <- function(reg_res, ylab, ymax=1, save_path=NULL, width=15, height=8, dpi=900){
    if (!is.null(save_path)){
      save_path <- here::here(save_path)
    }

    plot_obj = plot(reg_res, 
     axes=list(
       y=list(
         lab=ylab,
         type="response",  # Ensure we're on the response scale
         lim=c(0.0, ymax)  # Adjust these values based on your data range
       ),
       x=list(
         rotate=45  # Rotate x-axis labels
       ),
       grid=TRUE
     ),
     lines=list(col="blue"),
     confint=list(style="bars"),
    #  main="Effects on Proportion of Rumors Endorsed",
     key.args=list(space="right", columns=1),
     plot = FALSE
    )


    if (!is.null(save_path)) {
        # Determine file type from save_path
        file_ext <- tolower(tools::file_ext(save_path))
        
        # Set up the appropriate device
        if (file_ext == "pdf") {
            pdf(save_path, width=width, height=height)
        } else if (file_ext %in% c("png", "jpg", "jpeg", "tiff")) {
            if (file_ext == "jpg") file_ext <- "jpeg"
            do.call(file_ext, list(filename=save_path, width=width, height=height, units="in", res=dpi))
        } else {
            stop("Unsupported file type. Please use pdf, png, jpg, or tiff.")
        }
        
        # Print the plot
        print(plot_obj)
        
        # Close the device
        dev.off()
        
        cat("Plot saved to", save_path, "\n")
    }
    return(plot_obj)
}


plot_conspiracy_beliefs <- function(data, y_vars, y_var_labels=y_vars, by_var = NULL, by_var2 = NULL, weighted=TRUE) {
  
  by_var <- enquo(by_var)
  by_var2 <- enquo(by_var2)

  has_weight <- "weight" %in% names(data) & weighted

  # Prepare the data
  plot_data <- data %>%
    dplyr::select(all_of(y_vars), !!by_var, !!by_var2) %>%
    {if (has_weight) dplyr::bind_cols(., dplyr::select(data, weight)) else .} %>%
    pivot_longer(cols = all_of(y_vars), names_to = "law_type", values_to = "belief") %>%
    filter(!is.na(belief))

  # Filter out NAs for by_vars
  if (!quo_is_null(by_var)) {
    plot_data <- plot_data %>% filter(!is.na(!!by_var))
  }
  if (!quo_is_null(by_var2)) {
    plot_data <- plot_data %>% filter(!is.na(!!by_var2))
  }

  overall_freq <- plot_data %>%
    group_by(law_type) %>%
    dplyr::summarise(
      common_freq = sum(if(has_weight) {
        as.numeric(belief == "Common") * weight
      } else {
        as.numeric(belief == "Common")
      }, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    arrange(common_freq)

  plot_data <- plot_data %>%
    dplyr::mutate(
      belief = fct_relevel(belief, "Common", "Not Common"),
      law_type = factor(law_type, 
                        levels = overall_freq$law_type, 
                        labels = y_var_labels[match(overall_freq$law_type, paste0("law2_q", 1:length(y_vars)))])
    )

  # Calculate total sample size
  total_sample_size <- nrow(data)

  # Group by variables
  group_vars <- c(quo(law_type))
  if (!quo_is_null(by_var)) group_vars <- c(group_vars, by_var)
  if (!quo_is_null(by_var2)) group_vars <- c(group_vars, by_var2)
  label_size <- 6.0
  if (!quo_is_null(by_var)) label_size <- label_size - 0.5
  if (!quo_is_null(by_var2)) label_size <- label_size - 1.5
  all_vars <- c(group_vars, quo(belief))
  
  plot_data <- plot_data %>%
    group_by(!!!all_vars) %>%
    dplyr::summarise(
      count = if (has_weight) sum(weight, na.rm=TRUE) else n(),
      .groups = "drop"
      ) %>%
    ungroup() %>%
    group_by(!!!group_vars) %>%
    dplyr::mutate(proportion = count / sum(count)) %>%
    dplyr::mutate(count = round(count)) %>%
    ungroup()

  # Calculate alpha based on overall count distribution
  count_breaks <- c(1, 5, 10, 15, 20, 25, 30, 50, 100)
  alpha_breaks <- scales::rescale(count_breaks, to = c(0.3, 1))
  
  plot_data <- plot_data %>%
    dplyr::mutate(alpha = scales::rescale(count, 
                                   from = c(min(count_breaks), max(count_breaks)),
                                   to = c(min(alpha_breaks), max(alpha_breaks))))

  # Define colors
  belief_colors <- c("Common" = "#FF6B6B", "Not Common" = "#4ECDC4")
  
  # Create the plot
  p <- ggplot(plot_data, aes(x = law_type, y = count, fill = belief)) +
    geom_bar(stat = "identity", position = "fill", aes(alpha = alpha)) +
    scale_y_continuous(labels = scales::percent) +
    scale_fill_manual(values = belief_colors, 
                      name = "Belief",
                      labels = c("Common", "Not Common")) +
    scale_alpha_continuous(range = c(0.3, 1), guide = "none") +
    geom_bar_text(aes(
              label = sprintf("%.1f%% (%d)", proportion * 100, count),
              group = belief
              ),
            position = "fill",
          reflow=TRUE,
          grow=FALSE,
          place = "left",
          color = "black") +
    # geom_text(aes(
    #           label = sprintf("%.1f%% (%d)", proportion * 100, count),
    #           group = belief,
    #           hjust = ifelse(belief == "Common", 0.85, 0.2),
    #           ),
    #       position = position_fill(
    #         vjust = 0.5),
    #         # hjust = ifelse(plot_data$belief == "Common", 0, 1),
    #         # hjust = "inward",
    #       color = "black", size = label_size) +
    # geom_text(aes(label = sprintf("%.1f%% (%d)", proportion * 100, count), 
    #           y = ifelse(belief == "Common", proportion/2, 1 - proportion/2)),
    #       position = position_fill(vjust = 0.5),
    #       color = "black", size = 4.5) +
    labs(title = "Frequency of Beliefs About Various Types of Election Fraud", 
        subtitle = ifelse(
              quo_is_null(by_var) && quo_is_null(by_var2) && !has_weight, "", 
              paste(ifelse(!quo_is_null(by_var) | !quo_is_null(by_var2), "by", ""), 
                    ifelse(!quo_is_null(by_var), quo_name(by_var) %>% clean_str(), ""), 
                    ifelse(!quo_is_null(by_var2), paste("and", quo_name(by_var2) %>% clean_str()), ""),
                    ifelse(has_weight, "(Weighted by Sampling Weights)", "")
                    )
                    ),
         x = "Type of Election Fraud",
         y = "Frequency") +
    theme_minimal()

  # Add faceting based on by_var and by_var2
  if (!quo_is_null(by_var) && !quo_is_null(by_var2)) {
    p <- p + facet_grid(rows = vars(!!by_var), cols = vars(!!by_var2), scales = "free")
  } else if (!quo_is_null(by_var)) {
    p <- p + facet_wrap(vars(!!by_var), scales = "free_y", ncol = 1)
  } else if (!quo_is_null(by_var2)) {
    p <- p + facet_wrap(vars(!!by_var2), scales = "free_y", ncol = 1)
  }

  p <- p + theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom",
    plot.title = element_text(size=24),
    plot.title.position = "plot",
    plot.subtitle = element_text(size=20),
    axis.title = element_text(size=20),
    axis.text = element_text(size=15),
    strip.text = element_text(size = 20),
    plot.margin = margin(t = 20, r = 20, b = 20, l = 60, unit = "pt")
    )

  p <- p + coord_flip(clip="off")  # Flip coordinates to put "not common" on the bottom

  return(p)
}

#### Prepare data ####
caltech_climate_junejuly24 <- read_sav(file.path(data_dir, data_file))

#Initial recodes for tables
dat <- caltech_climate_junejuly24 %>%
    mutate(
        gender2 = case_when(gender4 == 1 ~ 1,
                            gender4 == 2 ~ 2),
        urbancity4 = case_when(urbancity == 1 ~ 1,
                               urbancity == 2 ~ 2,
                               urbancity == 3 ~ 3,
                               urbancity == 4 ~ 4),
        newsint4 = case_when(newsint==1 ~ 1,
                             newsint==2 ~ 2,
                             newsint==3 ~ 3,
                             newsint==4 ~ 4),
        Q26_3 = na_if(Q26_3, 8),
        Q26_4 = na_if(Q26_4, 8),
        Q26_5 = na_if(Q26_5, 8),
        Q26_6 = na_if(Q26_6, 8),
        # reverse populism so that 1 is strongly disagree and 5 strongly agree (so 5 is high populism)
        populism_q1 = rating_max + rating_min - populism_1,
        populism_q2 = rating_max + rating_min - populism_2,
        populism_q3 = rating_max + rating_min - populism_3,
        populism_q4 = rating_max + rating_min - populism_4,
        populism_q5 = rating_max + rating_min - populism_5,
        populism_q6 = rating_max + rating_min - populism_6,
        # rename conspiracy questions
        conspiracy_q1 = Q26_1,
        conspiracy_q2 = Q26_2,
        conspiracy_q3 = Q26_3,
        conspiracy_q4 = Q26_4,
        conspiracy_q5 = Q26_5,
        conspiracy_q6 = Q26_6,
        # reverse conspiracy questions so that 1 is high conspiracy thinking and 5 low
        conspiracy_q1 = rating_max + rating_min - conspiracy_q1,
        conspiracy_q2 = rating_max + rating_min - conspiracy_q2,
        conspiracy_q3 = rating_max + rating_min - conspiracy_q3,
        conspiracy_q4 = rating_max + rating_min - conspiracy_q4,
        conspiracy_q5 = rating_max + rating_min - conspiracy_q5,
        conspiracy_q6 = rating_max + rating_min - conspiracy_q6,
        # code law questions to be on 3-point scale
        law3_q1 = case_when(law_1 == 1 | law_1 == 2 ~ 1,
                            law_1 == 3 | law_1 == 4 ~ 2,
                            law_1 == 5 ~ 3),
        law3_q2 = case_when(law_2 == 1 | law_2 == 2 ~ 1,
                            law_2 == 3 | law_2 == 4 ~ 2,
                            law_2 == 5 ~ 3),
        law3_q3 = case_when(law_3 == 1 | law_3 == 2 ~ 1,
                            law_3 == 3 | law_3 == 4 ~ 2,
                            law_3 == 5 ~ 3),
        law3_q4 = case_when(law_4 == 1 | law_4 == 2 ~ 1,
                            law_4 == 3 | law_4 == 4 ~ 2,
                            law_4 == 5 ~ 3),
        law3_q5 = case_when(law_5 == 1 | law_5 == 2 ~ 1,
                            law_5 == 3 | law_5 == 4 ~ 2,
                            law_5 == 5 ~ 3),    
        law3_q6 = case_when(law_6 == 1 | law_6 == 2 ~ 1,
                            law_6 == 3 | law_6 == 4 ~ 2,
                            law_6 == 5 ~ 3),
        law3_q7 = case_when(law_7 == 1 | law_7 == 2 ~ 1,
                            law_7 == 3 | law_7 == 4 ~ 2,
                            law_7 == 5 ~ 3),
        law3_q8 = case_when(law_8 == 1 | law_8 == 2 ~ 1,
                            law_8 == 3 | law_8 == 4 ~ 2,
                            law_8 == 5 ~ 3),
        law3_q9 = case_when(law_9 == 1 | law_9 == 2 ~ 1,
                            law_9 == 3 | law_9 == 4 ~ 2,
                            law_9 == 5 ~ 3),
        law3_q10 = case_when(law_10 == 1 | law_10 == 2 ~ 1,
                             law_10 == 3 | law_10 == 4 ~ 2,
                             law_10 == 5 ~ 3),
        # code law questions to be on binary scale
        law2_q1 = case_when(law_1 == 1 | law_1 == 2 ~ 1,
                            law_1 == 3 | law_1 == 4 ~ 0),
        law2_q2 = case_when(law_2 == 1 | law_2 == 2 ~ 1,
                            law_2 == 3 | law_2 == 4 ~ 0),
        law2_q3 = case_when(law_3 == 1 | law_3 == 2 ~ 1,
                            law_3 == 3 | law_3 == 4 ~ 0),
        law2_q4 = case_when(law_4 == 1 | law_4 == 2 ~ 1,
                            law_4 == 3 | law_4 == 4 ~ 0),
        law2_q5 = case_when(law_5 == 1 | law_5 == 2 ~ 1,
                            law_5 == 3 | law_5 == 4 ~ 0),   
        law2_q6 = case_when(law_6 == 1 | law_6 == 2 ~ 1,
                            law_6 == 3 | law_6 == 4 ~ 0),
        law2_q7 = case_when(law_7 == 1 | law_7 == 2 ~ 1,
                            law_7 == 3 | law_7 == 4 ~ 0),
        law2_q8 = case_when(law_8 == 1 | law_8 == 2 ~ 1,
                            law_8 == 3 | law_8 == 4 ~ 0),
        law2_q9 = case_when(law_9 == 1 | law_9 == 2 ~ 1,
                            law_9 == 3 | law_9 == 4 ~ 0),
        law2_q10 = case_when(law_10 == 1 | law_10 == 2 ~ 1,
                             law_10 == 3 | law_10 == 4 ~ 0),
        # recode political id
        pid3 = case_when(pid3 == 1 ~ 1,
                         pid3 == 2 ~ 2,
                         pid3 >= 3 ~ 3 # collapse "Independent" = "3", "Other party" = "4", "Not sure of party" = "5"
                         ),
        # recode ideology
        ideo3 = case_when(ideo3 == 1 ~ 1,
                          ideo3 == 2 ~ 2,
                          ideo3 == 3 ~ 3,
                          ideo3 > 3 ~ NA # remove "Not sure of ideology" = "4"
                          ),
        )

populism_bins <- seq(rating_min+rating_max-1, rating_max * total_populism_questions, by = rating_max)
npbins <- length(populism_bins)
# populism_bin_labels <- c("6-10", "11-15", "16-20", "21-25", "26-30")
populism_bin_labels <- data.frame(start = populism_bins[1:(npbins-1)]+1, end = populism_bins[2:npbins]) %>%
  mutate(label = paste(start, end, sep = "-")) %>% pull(label)
conspiracy_bins <- seq(rating_min+rating_max-1, rating_max * total_conspiracy_questions, by = rating_max)
# conspiracy_bin_labels <- c("6-10", "11-15", "16-20", "21-25", "26-30")
ncbins <- length(conspiracy_bins)
conspiracy_bin_labels <- data.frame(start = conspiracy_bins[1:(ncbins-1)]+1, end = conspiracy_bins[2:ncbins]) %>%
  mutate(label = paste(start, end, sep = "-")) %>% pull(label)

dat <- dat %>%
  dplyr::mutate(
    populism = rowSums(dplyr::select(., starts_with("populism_q")), na.rm = TRUE),
    conspiracy = rowSums(dplyr::select(., starts_with("conspiracy_q")), na.rm = TRUE),
    law = rowSums(dplyr::select(., starts_with("law2_q")), na.rm = TRUE)
  ) %>%
  dplyr::mutate(populism_bin = cut(populism, breaks = populism_bins, labels = populism_bin_labels),
         conspiracy_bin = cut(conspiracy, breaks = conspiracy_bins, labels = conspiracy_bin_labels))


# Remove rows with missing values in any of the variables used in the model
complete_cases <- complete.cases(dat[,variables_in_model])

dat_final <- subset(dat, complete_cases)

dat_final <- dat_final %>% mutate(
  across(c(gender2, urbancity4, age4, race4, educ4, pid3, ideo3, region, newsint4, populism_bin, conspiracy_bin), as.factor),
  across(c(populism, conspiracy), as.numeric),
  gender2 = fct_recode(gender2, "Male" = "1", "Female" = "2"),
  age4 = fct_recode(age4, "Under 30" = "1", "30-44" = "2", "45-64" = "3", "65+" = "4"),
  age4 = fct_relevel(age4, "Under 30", "30-44", "45-64", "65+"),
  race4 = fct_recode(race4, "White" = "1", "Black" = "2", "Hispanic" = "3", "Other" = "4"),
  educ4 = fct_recode(educ4, "HS or less" = "1", "Some college" = "2", "College grad" = "3", "Postgrad" = "4"),
  educ4 = fct_relevel(educ4, "HS or less", "Some college", "College grad", "Postgrad"),
  pid3 = fct_recode(pid3, "Democrat" = "1", "Republican" = "2", "Independent" = "3"),
  pid3 = fct_relevel(pid3, "Democrat", "Republican", "Independent"),
  ideo3 = fct_recode(ideo3, "Liberal" = "1", "Moderate" = "2", "Conservative" = "3"),
  ideo3 = fct_relevel(ideo3, "Liberal", "Moderate", "Conservative"),
  region = fct_recode(region, "Northeast" = "1", "Midwest" = "2", "South" = "3", "West" = "4"),
  urbancity4 = fct_recode(urbancity4, "City" = "1", "Suburb" = "2", "Town" = "3", "Rural area" = "4"),
  newsint4 = fct_recode(newsint4, "Follows politics most of the time" = "1", "Follows politics some of the time" = "2", "Follows politics only now and then" = "3", "Follows politics hardly at all" = "4"),
  # Recode law variables
  across(starts_with("law2_q"), 
           ~factor(., levels = c(0, 1), labels = c("Not Common", "Common"))),
  across(starts_with("law2_q"), 
          ~fct_relevel(., "Not Common", "Common")),
  total_questions = total_questions
)

# Prepare data and create survey design
rename_variables <- function(data, label_list) {
  new_names <- sapply(names(data), function(x) {
    if (x %in% names(label_list)) label_list[[x]] else x
  })
  setNames(data, new_names)
}

# Rename the variables in dat_final
dat_final <- rename_variables(dat_final, variable_labels)

svy_design <- svydesign(data = dat_final, weights = ~weight, id = ~1)

#### Create summary tables and plots ####

##### PARTY ID #####
# Proportion of respondents for each party ID
round(prop.table(svytable(~Party_Identification, svy_design)),2)
# Law Responses by Party ID
print_law_props("Party_Identification", svy_design)

#### IDEOLOGY #####
# Proportion of respondents for each ideology
round(prop.table(svytable(~Ideology, svy_design)),2)
# Law Responses by Ideology
print_law_props("Ideology", svy_design)

#### GENDER #####
# Proportion of respondents for each gender
round(prop.table(svytable(~Gender, svy_design)),2)
# Law Responses
print_law_props("Gender", svy_design)

#### AGE #####
# Proportion of respondents for each age group
round(prop.table(svytable(~Age_Group, svy_design)),2)
# Law Responses by Age
print_law_props("Age_Group", svy_design)

#### EDUCATION #####
# Proportion of respondents for each education level
round(prop.table(svytable(~Education_Level, svy_design)),2)
# Law Responses by Education
print_law_props("Education_Level", svy_design)

#### RACE ####
# Proportion of respondents for each race
round(prop.table(svytable(~Race_Ethnicity, svy_design)),2)
# Law Responses by Race
print_law_props("Race_Ethnicity", svy_design)

#### REGION ####
# Proportion of respondents for each region
round(prop.table(svytable(~Region, svy_design)),2)
# Law Responses by Region
print_law_props("Region", svy_design)

#### URBAN/RURAL ####
# Proportion of respondents for each urban/rural category
round(prop.table(svytable(~Urban_Rural, svy_design)),2)
# Law Responses by Urban/Rural
print_law_props("Urban_Rural", svy_design)

#### POPULISM SCORE ####
# Proportion of respondents for each populism score
round(prop.table(svytable(~Populism_Bin, svy_design)),2)
print_law_props("Populism_Bin", svy_design)

#### CONSPIRACY SCORE ####
# Proportion of respondents for each conspiracy score
round(prop.table(svytable(~Conspiracy_Bin, svy_design)),2)
print_law_props("Conspiracy_Bin", svy_design)

#### NEWS INTEREST ####
# Proportion of respondents for each news interest level
round(prop.table(svytable(~Political_Interest, svy_design)),2)
print_law_props("Political_Interest", svy_design)



#### SUMMARY STATISTICS ####
numeric_vars <- c("Populism_Score", "Conspiracy_Score", "Total_Election_Crimes")
factor_vars <- all_var_labels[all_var_labels %!in% c(numeric_vars, law_var_labels)]

# For weighted statistics
summary_stats_weighted <- generate_summary_stats(dat_final, 
                                                 vars_to_summarize = numeric_vars, 
                                                 weight_var = weight,
                                                 include_factors = FALSE)

weighted_latex_table <- create_latex_table(summary_stats_weighted, 
                                    filename = paste0("summary_statistics_", "numeric_weighted", ".tex"), 
                                    title = paste("Summary Statistics -", "Weighted Numeric Variables"), 
                                    note = "")     
# For unweighted statistics
summary_stats_unweighted <- generate_summary_stats(dat_final, 
                                                   vars_to_summarize = numeric_vars, 
                                                   weight_var = NULL,
                                                   include_factors = FALSE)

unweighted_latex_table <- create_latex_table(summary_stats_unweighted, 
                                    filename = paste0("summary_statistics_", "numeric_unweighted", ".tex"), 
                                    title = paste("Summary Statistics -", "Unweighted Numeric Variables")
                                    )

# Generating factor summaries
for (var in factor_vars) {
  print(var)
  factor_summary_stats <- generate_factor_summary(dat_final, var)
  latex_table <- create_latex_table(factor_summary_stats, 
                                    filename = paste0("summary_statistics_", tolower(var), ".tex"), 
                                    title = paste("Summary Statistics -", var)
                                    )                                              
}

law_summary_stats <- data.table()
for (var in law_var_labels) {
  print(var)
  law_summary_stat <- generate_factor_summary(dat_final, var)
  law_summary_stats <- rbindlist(list(law_summary_stats, law_summary_stat))                                         
}

law_latex_table <- create_latex_table(law_summary_stats, 
                                    filename = paste0("summary_statistics_", "law", ".tex"), 
                                    title = paste("Summary Statistics -", "Election Crimes")
                                    )     


#### RUN REGRESSIONS ####
law1_logit <- run_regression(law_var_labels[1], variables_in_model_labels, svy_design)
law2_logit <- run_regression(law_var_labels[2], variables_in_model_labels, svy_design)
law3_logit <- run_regression(law_var_labels[3], variables_in_model_labels, svy_design)
law4_logit <- run_regression(law_var_labels[4], variables_in_model_labels, svy_design)
law5_logit <- run_regression(law_var_labels[5], variables_in_model_labels, svy_design)
law6_logit <- run_regression(law_var_labels[6], variables_in_model_labels, svy_design)
law7_logit <- run_regression(law_var_labels[7], variables_in_model_labels, svy_design)
law8_logit <- run_regression(law_var_labels[8], variables_in_model_labels, svy_design)
law9_logit <- run_regression(law_var_labels[9], variables_in_model_labels, svy_design)
law10_logit <- run_regression(law_var_labels[10], variables_in_model_labels, svy_design)

### AGGREGATE LAW RESPONSES ###
full_linear_model <- run_regression(variable_labels$law, variables_in_model_labels, svy_design, family=gaussian)
full_prop_model <- run_regression(paste(variable_labels$law, total_questions, sep = "/"), variables_in_model_labels, svy_design, family=quasibinomial(link = "logit"))


#### CALCULATE MARGINAL EFFECTS ####
law1_me <- allEffects(law1_logit)
law2_me <- allEffects(law2_logit)
law3_me <- allEffects(law3_logit)
law4_me <- allEffects(law4_logit)
law5_me <- allEffects(law5_logit)
law6_me <- allEffects(law6_logit)
law7_me <- allEffects(law7_logit)
law8_me <- allEffects(law8_logit)
law9_me <- allEffects(law9_logit)
law10_me <- allEffects(law10_logit)

law_me <- allEffects(full_linear_model)

law_plot <- effect_plot(law_me, "Number of Rumors Endorsed", save_path="law_me_plot.png", 
  width = 18, height = 8, dpi = 900)
print(law_plot)

law_me_prop <- allEffects(full_prop_model)
law_prop_plot <- effect_plot(law_me_prop, "Proportion of Rumors Endorsed", ymax=1.0, save_path="law_me_prop_plot.png", 
  width = 18, height = 8, dpi = 900)
print(law_plot)

effect_plot(law1_me, sprintf("Belief in Rumor: %s", law_var_labels[1]), ymax=1.0, save_path="law1_me_plot.png")
effect_plot(law2_me, sprintf("Belief in Rumor: %s", law_var_labels[2]), ymax=1.0, save_path="law2_me_plot.png")
effect_plot(law3_me, sprintf("Belief in Rumor: %s", law_var_labels[3]), ymax=1.0, save_path="law3_me_plot.png")
effect_plot(law4_me, sprintf("Belief in Rumor: %s", law_var_labels[4]), ymax=1.0, save_path="law4_me_plot.png")
effect_plot(law5_me, sprintf("Belief in Rumor: %s", law_var_labels[5]), ymax=1.0, save_path="law5_me_plot.png")
effect_plot(law6_me, sprintf("Belief in Rumor: %s", law_var_labels[6]), ymax=1.0, save_path="law6_me_plot.png")
effect_plot(law7_me, sprintf("Belief in Rumor: %s", law_var_labels[7]), ymax=1.0, save_path="law7_me_plot.png")
effect_plot(law8_me, sprintf("Belief in Rumor: %s", law_var_labels[8]), ymax=1.0, save_path="law8_me_plot.png")
effect_plot(law9_me, sprintf("Belief in Rumor: %s", law_var_labels[9]), ymax=1.0, save_path="law9_me_plot.png")
effect_plot(law10_me, sprintf("Belief in Rumor: %s", law_var_labels[10]), ymax=1.0, save_path="law10_me_plot.png")

#### REGRESSIONS ####
# create labels for covariates

covariate_labels <- c()
for (var in variables_in_model_labels){
  covariate_labels <- c(covariate_labels, get_labels(dat_final, var))
}
covariate_labels <- stringr::str_replace_all(covariate_labels, "_", " ")

law_regs_table1 <- create_logit_summary_table(
  models = list(law1_logit, law2_logit, law3_logit, law4_logit),
  title = "Logistic Regression Results for Election Crime Questions",
  dep.var.labels = law_var_labels[1:4],
  covariate.labels = covariate_labels,
  out.file = "law_regs_table1.tex",
  label="tab:law_regs_table1"
)


law_regs_table2 <- create_logit_summary_table(
  models = list(law5_logit, law6_logit, law7_logit),
  title = "Logistic Regression Results for Election Crime Questions",
  dep.var.labels = law_var_labels[5:7],
  covariate.labels = covariate_labels,
  out.file = "law_regs_table2.tex",
  label="tab:law_regs_table2"
)


law_regs_table3 <- create_logit_summary_table(
  models = list(law8_logit, law9_logit, law10_logit),
  title = "Logistic Regression Results for Election Crime Questions",
  dep.var.labels = law_var_labels[8:10],
  covariate.labels = covariate_labels,
  out.file = "law_regs_table3.tex",
  label="tab:law_regs_table3"
)

total_law_regs_table <- create_logit_summary_table(
  models = list(full_linear_model, full_prop_model),
  title = "Regression Results for Aggregate Answers to Election Crime Questions",
  dep.var.labels = c("Number of Election Crimes Believed", "Proportion of Election Crimes Believed"),
  covariate.labels = covariate_labels,
  out.file = "total_law_reg_mini.tex",
  label="tab:total_law_reg_mini"
)

#### INTERACTIONS ####


# Which variables to include interactions between
int_vars <- c("Party_Identification", "Political_Interest")

law1_logit_int <- run_regression("law2_q1", variables_in_model_labels, svy_design, interactions = int_vars)
law2_logit_int <- run_regression("law2_q2", variables_in_model_labels, svy_design, interactions = int_vars)
law3_logit_int <- run_regression("law2_q3", variables_in_model_labels, svy_design, interactions = int_vars)
law4_logit_int <- run_regression("law2_q4", variables_in_model_labels, svy_design, interactions = int_vars)
law5_logit_int <- run_regression("law2_q5", variables_in_model_labels, svy_design, interactions = int_vars)
law6_logit_int <- run_regression("law2_q6", variables_in_model_labels, svy_design, interactions = int_vars)
law7_logit_int <- run_regression("law2_q7", variables_in_model_labels, svy_design, interactions = int_vars)
law8_logit_int <- run_regression("law2_q8", variables_in_model_labels, svy_design, interactions = int_vars)
law9_logit_int <- run_regression("law2_q9", variables_in_model_labels, svy_design, interactions = int_vars)
law10_logit_int <- run_regression("law2_q10", variables_in_model_labels, svy_design, interactions = int_vars)

### AGGREGATE LAW RESPONSES ###
full_linear_model_int <- run_regression("law", variables_in_model_labels, svy_design, interactions = int_vars, family=gaussian)
full_prop_model_int <- run_regression(paste("law", total_questions, sep = "/"), variables_in_model_labels, svy_design, interactions = int_vars, family=quasibinomial(link = "logit"))


int_id_labels <- label_interaction(dat_final, int_vars[1], int_vars[2])

covariate_labels_int <- c(
    covariate_labels,
    int_id_labels
  )
covariate_labels_int <- stringr::str_replace_all(covariate_labels_int, "_", " ")

law_regs_table1_int <- create_logit_summary_table(
  models = list(law1_logit_int, law2_logit_int, law3_logit_int, law4_logit_int),
  title = "Logistic Regression Results for Election Crime Questions",
  dep.var.labels = law_var_labels[1:4],
  covariate.labels = covariate_labels_int,
  out.file = "law_regs_table1_int.tex",
  label="tab:law_regs_table1_int"
)

law_regs_table2_int <- create_logit_summary_table(
  models = list(law5_logit_int, law6_logit_int, law7_logit_int),
  title = "Logistic Regression Results for Election Crime Questions",
  dep.var.labels = law_var_labels[5:7],
  covariate.labels = covariate_labels_int,
  out.file = "law_regs_table2_int.tex",
  label="tab:law_regs_table2_int"
)

law_regs_table3_int <- create_logit_summary_table(
  models = list(law8_logit_int, law9_logit_int, law10_logit_int),
  title = "Logistic Regression Results for Election Crime Questions",
  dep.var.labels = law_var_labels[8:10],
  covariate.labels = covariate_labels_int,
  out.file = "law_regs_table3_int.tex",
  label="tab:law_regs_table3_int"
)


total_law_regs_table_int <- create_logit_summary_table(
  models = list(full_linear_model_int, full_prop_model_int),
  title = "Regression Results for Aggregate Answers to Election Crime Questions",
  dep.var.labels = c("Number of Election Crimes Believed", "Proportion of Election Crimes Believed"),
  covariate.labels = covariate_labels_int,
  out.file = "total_law_reg_int.tex",
  label="tab:total_law_reg_int"
)

#### SECOND SET OF INTERACTIONS ####
int_vars2 <- c("Urban_Rural", "Ideology") 


law1_logit_int2 <- run_regression("law2_q1", variables_in_model_labels, svy_design, interactions = int_vars2)
law2_logit_int2 <- run_regression("law2_q2", variables_in_model_labels, svy_design, interactions = int_vars2)
law3_logit_int2 <- run_regression("law2_q3", variables_in_model_labels, svy_design, interactions = int_vars2)
law4_logit_int2 <- run_regression("law2_q4", variables_in_model_labels, svy_design, interactions = int_vars2)
law5_logit_int2 <- run_regression("law2_q5", variables_in_model_labels, svy_design, interactions = int_vars2)
law6_logit_int2 <- run_regression("law2_q6", variables_in_model_labels, svy_design, interactions = int_vars2)
law7_logit_int2 <- run_regression("law2_q7", variables_in_model_labels, svy_design, interactions = int_vars2)
law8_logit_int2 <- run_regression("law2_q8", variables_in_model_labels, svy_design, interactions = int_vars2)
law9_logit_int2 <- run_regression("law2_q9", variables_in_model_labels, svy_design, interactions = int_vars2)
law10_logit_int2 <- run_regression("law2_q10", variables_in_model_labels, svy_design, interactions = int_vars2)

### AGGREGATE LAW RESPONSES ###
full_linear_model_int2 <- run_regression("law", variables_in_model_labels, svy_design, interactions = int_vars2, family=gaussian)
full_prop_model_int2 <- run_regression(paste("law", total_questions, sep = "/"), variables_in_model_labels, svy_design, interactions = int_vars2, family=quasibinomial(link = "logit"))


int_id_labels2 <- label_interaction(dat_final, int_vars2[1], int_vars2[2])

covariate_labels_int2 <- c(
    covariate_labels,
    int_id_labels2
  )
covariate_labels_int2 <- stringr::str_replace_all(covariate_labels_int2, "_", " ")

law_regs_table1_int2 <- create_logit_summary_table(
  models = list(law1_logit_int2, law2_logit_int2, law3_logit_int2, law4_logit_int2),
  title = "Logistic Regression Results for Election Crime Questions",
  dep.var.labels = law_var_labels[1:4],
  covariate.labels = covariate_labels_int2,
  out.file = "law_regs_table1_int2.tex",
  label="tab:law_regs_table1_int2"
)

law_regs_table2_int2 <- create_logit_summary_table(
  models = list(law5_logit_int2, law6_logit_int2, law7_logit_int2),
  title = "Logistic Regression Results for Election Crime Questions",
  dep.var.labels = law_var_labels[5:7],
  covariate.labels = covariate_labels_int2,
  out.file = "law_regs_table2_int2.tex",
  label="tab:law_regs_table2_int2"
)

law_regs_table3_int2 <- create_logit_summary_table(
  models = list(law8_logit_int2, law9_logit_int2, law10_logit_int2),
  title = "Logistic Regression Results for Election Crime Questions",
  dep.var.labels = law_var_labels[8:10],
  covariate.labels = covariate_labels_int2,
  out.file = "law_regs_table3_int2.tex",
  label="tab:law_regs_table3_int2"
)


total_law_regs_table_int2 <- create_logit_summary_table(
  models = list(full_linear_model_int2, full_prop_model_int2),
  title = "Regression Results for Aggregate Answers to Election Crime Questions",
  dep.var.labels = c("Number of Election Crimes Believed", "Proportion of Election Crimes Believed"),
  covariate.labels = covariate_labels_int2,
  out.file = "total_law_reg_int2.tex",
  label="tab:total_law_reg_int2"
)

#### PLOTS ####

# Without by variables
plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars, 
                        y_var_labels = law_var_labels, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot.png", width = 12, height = 16)

# With one by variable
plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars, 
                        by_var = Party_Identification,
                        y_var_labels = law_var_labels, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot_party.png", width = 12, height = 16)

plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars, 
                        by_var = Party_Identification,
                        y_var_labels = law_var_labels, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot_ideology.png", width = 12, height = 16)

plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars, 
                        by_var = Conspiracy_Bin,
                        y_var_labels = law_var_labels, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot_conspiracy.png", width = 12, height = 16)


plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars, 
                        by_var = Populism_Bin,
                        y_var_labels = law_var_labels, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot_populism.png", width = 12, height = 16)

plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars, 
                        by_var = Political_Interest,
                        y_var_labels = law_var_labels, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot_interest.png", width = 12, height = 16)

plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars, 
                        by_var = Education_Level,
                        y_var_labels = law_var_labels, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot_education.png", width = 12, height = 16)

plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars, 
                        by_var = Gender,
                        y_var_labels = law_var_labels, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot_gender.png", width = 12, height = 16)


plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars, 
                        by_var = Region,
                        y_var_labels = law_var_labels, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot_region.png", width = 12, height = 16)


plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars, 
                        by_var = Urban_Rural,
                        y_var_labels = law_var_labels, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot_urban.png", width = 12, height = 16)



# With two by variables
plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars,
                        y_var_labels = law_var_labels, 
                        by_var = Party_Identification,
                        by_var2 = Conspiracy_Bin, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot_party_conspiracy.png", width = 20, height = 16)

plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars,
                        y_var_labels = law_var_labels, 
                        by_var = Party_Identification,
                        by_var2 = Political_Interest, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot_party_interest.png", width = 20, height = 16)


plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars,
                        y_var_labels = law_var_labels, 
                        by_var = Party_Identification,
                        by_var2 = Populism_Bin, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot_party_populism.png", width = 20, height = 16)

plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars,
                        y_var_labels = law_var_labels, 
                        by_var = Party_Identification,
                        by_var2 = Urban_Rural, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot_party_region.png", width = 20, height = 16)

plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars,
                        y_var_labels = law_var_labels, 
                        by_var = Party_Identification,
                        by_var2 = Urban_Rural, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot_party_urban.png", width = 20, height = 16)


plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars,
                        y_var_labels = law_var_labels, 
                        by_var = Urban_Rural,
                        by_var2 = Region, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot_urban_region.png", width = 20, height = 16)

plot_conspiracy_beliefs(dat_final, 
                        y_vars = law_vars,
                        y_var_labels = law_var_labels, 
                        by_var = Party_Identification,
                        by_var2 = Education_Level, 
                        weighted=TRUE
                        )
ggsave("conspiracy_beliefs_plot_party_education.png", width = 20, height = 16)
        

##### FACTOR ANALYSIS ####
library(psych)
library(corrplot)
variables_of_interest <- dat_final[, law_var_labels]
# convert tibble factor to numeric
variables_of_interest <- as.data.frame(lapply(variables_of_interest, as.numeric))
cor_matrix <- cor(variables_of_interest, use = "pairwise.complete.obs")
corrplot(cor_matrix, method = "color")

# Determine the number of factors to extract
scree(variables_of_interest)

# Perform the factor analysis
fa_result <- fa(variables_of_interest, nfactors = 1, rotate = "varimax", fm = "ml")
print(fa_result)

fa_result_2 <- fa(variables_of_interest, nfactors = 2, rotate = "varimax", fm = "ml")
print(fa_result_2)

print(fa_result$TLI)
print(fa_result$RMSEA)

parallel_result <- fa.parallel(variables_of_interest)
